#Alexander F. Gazmararian
#afg2@princeton.edu

#load packages
library(tidyverse)
library(dfadjust)
library(ggplot2)
library(dplyr)
library(kableExtra)
library(modelsummary)
library(margins)
library(lmtest)
library(sandwich)

#load data
g <- readRDS("data/NatYouthQual.rds")

# load functions
source("code/fun/book_theme.r")
source("code/fun/savefig.r")
source("code/fun/fix_txt.r")

# load coefficient names
source("code/fun/coefnames4tables.r")
coefnames <- c("transp_treat"="Transparency Treatment",
               "transp_treat:youth_blockYouth"="Transparency Treatment x Youth",
               "transp_treat:newsall_i" = "Transparency Treatment x Local News (Objective)",
               "newsall_i" = "Local News (Objective)",
               "transp_treat:Mediaworks"="Transparency Treatment x Local News (Subjective)",
               "Mediaworks"="Local News (Subjective)",
               "transp_treat:newsall_and"="Transparency Treatment x Local News (Objective and Subjective)",
               "newsall_and"="Local News (Objective and Subjective)",
               coefnames)

# Estimate ATE of transparency ----

# Create function to estimate model with HC2 standard errors and confidence intervals
get_intervals <- function(model, var) {
  require(dfadjust)
  out <- dfadjustSE(model)$coefficients
  out_df <- data.frame(out)
  out_df <- subset(out_df, select = c(Estimate, HC2.se))
  out_df$Estimate
  out_df$HC2.se
  out_df$lb95 <- out_df$Estimate + qnorm(.025) * out_df$HC2.se
  out_df$ub95 <- out_df$Estimate + qnorm(.975) * out_df$HC2.se
  out_df$lb90 <- out_df$Estimate + qnorm(.05) * out_df$HC2.se
  out_df$ub90 <- out_df$Estimate + qnorm(.95) * out_df$HC2.se
  out_vars <- out_df[var,]
  return(out_vars)
}

# Create function to estimate the model, using the non-adjusted partisanship measure
estimate_models <- function(outcome, treatment, before = FALSE, df = g) {
  f.base <- as.formula(paste(outcome, "~", treatment, "+ age + Female + PartySummary + Black + Hispanic + HighestEd + income5 + employfull + youth_block + pc1 + mobility_f + ruralhrsa"))
  if(isTRUE(before)) {
    dat <- subset(df, before == 0)
    #The control condition says the government will not provide transparency. An early
    #version varied whether the control mentioned transparency. However, two concerns
    #emerged: first, the contrast between transparency and no mention of transparency did
    #not mirror the political debates where the choice was between transparency and none at
    #all; second, the treatment could prime respondents to be worried about local benefits.
    #We addressed these concerns by randomizing whether the prompt included the world
    #“not,” which matched debates over transparency and kept priming constant across
    #conditions. We were only able to make this change on the national public survey since
    #the local policymaker survey was already in the field. So the one outcome where we do
    #not trim responses from the pretest is the jobs going to locals outcome, which is on both
    #surveys. This should introduce bias against an effect.
  }
  else {
    dat <- g
  }
  #estimate ATE
  m.ate <- lm(f.base, dat)
  #plot output
  models <- list(m.ate)
  m.ci <- lapply(models, get_intervals, var = treatment)
  m.df <- do.call("rbind", m.ci)
  m.df$model <- c("ATE")
  m.df$outcome <- outcome
  return(list(m.df,m.ate))
}

# Estimate models
localjobs <- estimate_models("I(transplocal/100)", "transp_treat")
train <- estimate_models("transptrain_i", "transp_treat", before = TRUE)
detect <- estimate_models("transpdetect_i", "transp_treat", before = TRUE)
account <- estimate_models("transpaccount_i", "transp_treat", before = TRUE)
vote <- estimate_models("transpvote_i", "transp_treat", before = TRUE)
tindex <- estimate_models("transp_index", "transp_treat")
transp.df <- rbind(localjobs[[1]], train[[1]], detect[[1]], account[[1]], vote[[1]], tindex[[1]])
transp.df$model <- with(transp.df, ifelse(outcome == "transp_index", "Index", "Individual"))
transp.df <-
  transp.df %>%
  dplyr::mutate(
    outcome = dplyr::recode(
      outcome,
      "I(transplocal/100)" = "Jobs going to locals",
      "transpvote_i" = "Vote for pro-green investment politician",
      "transptrain_i" = "Find local green jobs after training",
      "transpdetect_i" = "Detect failure to create jobs",
      "transpaccount_i" = "Companies held accountable",
      "transp_index" = "Local Benefit Index"
    ),
    outcome = factor(
      outcome,
      ordered = TRUE,
      levels = rev(c(
        "Jobs going to locals",
        "Find local green jobs after training",
        "Vote for pro-green investment politician",
        "Detect failure to create jobs",
        "Companies held accountable",
        "Local Benefit Index"
      ))
    )
  )

## Create Figure 8.3 ----
transp.plot <-
  transp.df %>%
  ggplot() +
  geom_hline(yintercept = 0, color = "grey", lty = "dashed") +
  geom_errorbar(aes(x = outcome, ymin = lb95, ymax = ub95), position = position_dodge(.7), width = 0) +
  geom_errorbar(aes(x = outcome, ymin = lb90, ymax = ub90), position = position_dodge(.7), width = 0, size = 1.75) +
  geom_point(aes(x = outcome, y = Estimate), position = position_dodge(.7), size = 4) +
  labs(x = "", y = "Estimate",
       caption = "Notes: Thin and thick bars represent 95 and 90 percent confidence intervals.") +
  coord_flip() +
  facet_grid(rows=vars(model),scales="free_y",space="free") +
  book_theme +
  theme(strip.text = element_blank(),plot.margin = margin(r=5))
transp.plot
savefig(transp.plot, "8.3_figure_nattransp", height = 2.25, filepath = "figures/")

## Create table for online appendix ----
m.bin <- list(localjobs[[2]], train[[2]], detect[[2]], account[[2]], vote[[2]], tindex[[2]])
file <- "tables/ch8/ols_transp_youth.txt"
modelsummary(
  m.bin,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  vcov = "HC2",
  gof_map = c("nobs", "adj.r.squared"),
  coef_map = coefnames,
  col.names = c(" ","Local","Train","Detect","Account","Vote","Index"),
  escape = FALSE,
  output = "latex"
) %>%
  cat(., file = file)
fix_txt(file)

# Estimate continuous models
f.base <- y ~ transp_treat + youth_block + age + Female + PartySummary + Black + Hispanic + HighestEd + income5 + employfull + mobility + ruralhrsa + pc1

cont <- list()
cont[["Local"]] <- lm(update(f.base, I(transplocal/100) ~ .), g)
cont[["Train"]] <- lm(update(f.base, as.numeric(TranspTrain) ~ .), g, subset = (before == 0))
cont[["Train (Youth)"]] <- lm(update(f.base, as.numeric(TranspTrain) ~ .+ transp_treat * youth_block), g, subset = (before == 0))
cont[["Detect"]] <- lm(update(f.base, as.numeric(TranspDetect) ~ .), g, subset = (before == 0))
cont[["Account"]] <- lm(update(f.base, as.numeric(TranspAccount) ~ .), g, subset = (before == 0))
cont[["Vote"]] <- lm(update(f.base, as.numeric(TranspVote) ~ .), g, subset = (before == 0))
cont[["Index"]] <- lm(update(f.base, as.numeric(transp_index) ~ .), g)

# Create table
file <- "tables/ch8/ols_transp_cont.txt"
modelsummary(
  cont,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  vcov = "HC2",
  gof_map = c("nobs", "adj.r.squared"),
  coef_map = coefnames,
  escape = FALSE,
  output = "latex"
) %>%
  cat(., file = file)
fix_txt(file)

# Moderating Effect of Local Media ----

# Success of the local media is the fourth most-mentioned topic
g %>%
  summarize(across(c(GovMistrust,SmallTown,SocialMedia,Mediaworks,Whistleblower,Mediafails,CommunityInterest,CommunityDisinterest,CompMistrust), ~ mean(.x, na.rm = TRUE))) %>%
  pivot_longer(cols = c(GovMistrust:CompMistrust)) %>%
  arrange(desc(value))

# Examine open-ended responses mentioning that the media works in one's area
subset(g, Mediaworks == 1)$DetectOE.

# Estimate regression
m <- list()
v <- list()

m[["Objective"]] <- lm(transpaccount_i ~ transp_treat * newsall_i + age + Female + PartySummary + Black + Hispanic + HighestEd + Income_Incl + Employment + Ideo_simple + mobility_f + ruralhrsa + youth_block + state + pc1, g, subset = (before == 0))
v[["Objective"]] <- vcovHC(m$Objective, "HC1")

m[["Subjective"]] <- lm(transpaccount_i ~ transp_treat * Mediaworks + age + Female + PartySummary + Black + Hispanic + HighestEd + Income_Incl + Employment + Ideo_simple + mobility_f + ruralhrsa + youth_block + state + pc1, g, subset = (before == 0))
v[["Subjective"]] <- vcovHC(m$Subjective, "HC1")

m[["Objective and subjective"]] <- lm(transpaccount_i ~ transp_treat * newsall_and + age + Female + PartySummary + Black + Hispanic + HighestEd + Income_Incl + Employment + Ideo_simple + mobility_f + ruralhrsa + youth_block + state + pc1, g, subset = (before == 0))
v[["Objective and subjective"]] <- vcovHC(m$`Objective and subjective`, "HC1")

ame <- list()
ame[["Objective"]] <- margins_summary(m$Objective, variables = "transp_treat", at = list(newsall_i = 0:1), vcov = v$Objective)
ame[["Subjective"]] <- margins_summary(m$Subjective, variables = "transp_treat", at = list(Mediaworks = 0:1), vcov = v$Subjective)
ame[["Objective and subjective"]] <- margins_summary(m$`Objective and subjective`, variables = "transp_treat", at = list(newsall_and = 0:1), vcov = v$`Objective and subjective`)

ame.df <- lapply(ame, function(x) {
  df <- data.frame(x)
  names(df) <- c("treat", "group", "cate", "se", "z", "p", "lb", "ub")
  return(df)
})

ame.df <- bind_rows(ame.df, .id = "Model")

## Create Figure 8.5 ----
plot.media <-
  ame.df %>%
  mutate(
    outcome = factor(Model, ordered = TRUE, levels = c("Objective and subjective", "Subjective", "Objective")),
    lb90 = cate + se * qnorm(.05),
    ub90 = cate + se * qnorm(.95),
    group = ifelse(group==1,"News presence", "News desert")
  ) %>%
  ggplot() +
  geom_hline(yintercept=0,lty="dashed",color="grey") +
  geom_errorbar(aes(x=outcome,ymin=lb,ymax=ub,color=group),position=position_dodge(.4),width=0)+
  geom_errorbar(aes(x=outcome,ymin=lb90,ymax=ub90,color=group),size=1.75,position=position_dodge(.4),width=0)+
  geom_point(aes(x=outcome,y=cate,color=group,shape=group),position=position_dodge(.4),size=4)+
  book_theme+
  scale_color_manual(values = c("grey", "black")) +
  labs(x="",y="Estimate",color="",shape="",caption="Notes: Thin and thick bars represent 95 and 90 percent confidence intervals.")+
  coord_flip()+
  theme(legend.position="right")
plot.media
savefig(plot.media, "8.5_figure_newsdesert", height=1.8, filepath = "figures/")

## Create table for online appendix ----
file <- "tables/ch8/ols_transp_news.txt"
modelsummary(
  m,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  vcov = "HC1",
  coef_omit = "state|Income",
  col.names = c("", "(1)", "(2)", "(3)"),
  coef_map = coefnames,
  gof_map = c("nobs", "adj.r.squared"),
  escape = TRUE,
  output = "latex",
  add_rows = data.frame(
    c("State Fixed Effects","Income Control"),
    c("Yes","Yes"),
    c("Yes","Yes"),
    c("Yes","Yes")
  )
) %>%
  cat(., file = file)
fix_txt(file)
